## Understanding Variation in Start-Up Funds
## Emily M. Farris, Ellen M. Key, and Jane L. Sumner
## Replication contact: Jane L. Sumner (jlsumner@umn.edu)

library(ggplot2)
library(stringr)
library(survey)
library(stargazer)
library(xtable)

# this is our data
startup <- read.csv("startup-data.csv",
                    stringsAsFactors=F,na.strings=c("","NA"))

# This restricts to the data set in which people received *any* of the items
# out of their general start-up funds
# (i.e., it's a count of the items they received out of general start-up funds, 
# and they're retained in the data set if the count's more than zero)
only.money0 <- startup[which(apply(startup[,grepl("Q7",names(startup))],1,
                                   function(x){sum(x=="Yes, out of general start-up funds",na.rm=T)})>0),]


# This restricts the data set further, to only the expenses most explicitly 
# tied to research (so if someone got, say, a TA out of their start-up,
# but only that, or relocation, they're not in here)
only.money1 <- startup[which(apply(startup[,grepl("Q7_3|Q7_4|Q7_5|Q7_6|Q7_8|Q7_9|Q7_10|Q7_11|Q7_13",names(startup))],1,
                                   function(x){sum(x=="Yes, out of general start-up funds",na.rm=T)})>0),]

# data from APSA on its member demographics
demog <- read.csv("apsa-demographics.csv",na.strings="")
table(demog$gender)/sum(table(demog$gender))*100
demog$gender2 <- ifelse(demog$gender %in% c("Non-binary","Other","Prefer not to disclose","Transgender"),
                        "NEITHER",demog$gender)
demog$gender2 <- ifelse(demog$gender2=="Female","WOMAN",
                        ifelse(demog$gender2=="Male","MAN",demog$gender2))

demog2 <- read.csv("demog-R1s.csv")
demog2$gender2 <- ifelse(demog2$gender %in% c("Non-binary","Other","Prefer not to disclose","Transgender"),
                         "NEITHER",demog2$gender)
demog2$gender2 <- ifelse(demog2$gender=="Female","WOMAN",
                         ifelse(demog2$gender=="Male","MAN",demog2$gender2))
table(demog2$gender2)/sum(table(demog2$gender2))*100



##### Setting up the survey weights from the APSA data
apsa.pctW_all <- unlist(table(demog$gender2)/sum(table(demog$gender2)))[3]
apsa.pctNW_all <- 1-apsa.pctW_all


apsa.pctW_R1 <- unlist(table(demog2$gender2)/sum(table(demog2$gender2)))[3]
apsa.pctNW_R1 <- 1-apsa.pctW_R1

apsa.pctW_restrict1 <- unlist(table(demog$gender2)/sum(table(demog$gender2)))[3]
apsa.pctNW_restrict1 <- 1-apsa.pctW_restrict1

apsa.pctW_restrict2 <- unlist(table(demog$gender2)/sum(table(demog$gender2)))[3]
apsa.pctNW_restrict2 <- 1-apsa.pctW_restrict2

apsa.pctW_R1_restrict1 <- unlist(table(demog2$gender2)/sum(table(demog2$gender2)))[3]
apsa.pctNW_R1_restrict1 <- 1-apsa.pctW_R1_restrict1


apsa.pctW_R1_restrict2 <- unlist(table(demog2$gender2)/sum(table(demog2$gender2)))[3]
apsa.pctNW_R1_restrict2 <- 1-apsa.pctW_R1_restrict2

wtW_all <- apsa.pctW_all/mean(startup$woman,na.rm=T)
wtM_all <- apsa.pctNW_all/(1-mean(startup$woman,na.rm=T))

wtW_R1 <- apsa.pctW_R1/mean(startup$woman[which(startup$PhD==1)],na.rm=T)# population/sample
wtM_R1 <- apsa.pctNW_R1/(1-mean(startup$woman[which(startup$PhD==1)],na.rm=T))

wtW_restrict1 <- apsa.pctW_restrict1/mean(only.money0$woman,na.rm=T)# population/sample
wtM_restrict1 <- apsa.pctNW_restrict1/(1-mean(only.money0$woman,na.rm=T))

wtW_restrict2 <- apsa.pctW_restrict2/mean(only.money1$woman,na.rm=T)# population/sample
wtM_restrict2 <- apsa.pctNW_restrict2/(1-mean(only.money1$woman,na.rm=T))

wtW_R1_restrict1 <- apsa.pctW_R1_restrict1/mean(only.money0$woman,na.rm=T)# population/sample
wtM_R1_restrict1 <- apsa.pctNW_R1_restrict1/(1-mean(only.money0$woman,na.rm=T))

wtW_R1_restrict2 <- apsa.pctW_R1_restrict2/mean(only.money1$woman,na.rm=T)# population/sample
wtM_R1_restrict2 <- apsa.pctNW_R1_restrict2/(1-mean(only.money1$woman,na.rm=T))

startup$wt_all <- ifelse(startup$woman==1,wtW_all,wtM_all)
startup$wt_all <- ifelse(is.na(startup$wt_all),0,startup$wt_all)

startup$wt_R1 <- ifelse(startup$woman==1,wtW_R1,wtM_R1)
startup$wt_R1 <- ifelse(is.na(startup$wt_R1),0,startup$wt_R1)

only.money0$wt_restrict1 <- ifelse(only.money0$woman==1,wtW_restrict1,wtM_restrict1)
only.money0$wt_restrict1 <- ifelse(is.na(only.money0$wt_restrict1),0,only.money0$wt_restrict1)

only.money0$wt_R1_restrict1 <- ifelse(only.money0$woman==1,wtW_R1_restrict1,wtM_R1_restrict1)
only.money0$wt_R1_restrict1 <- ifelse(is.na(only.money0$wt_R1_restrict1),0,only.money0$wt_R1_restrict1)

only.money1$wt_R1_restrict2 <- ifelse(only.money1$woman==1,wtW_R1_restrict2,wtM_R1_restrict2)
only.money1$wt_R1_restrict2 <- ifelse(is.na(only.money1$wt_R1_restrict2),0,only.money1$wt_R1_restrict2)

startup$id <- c(1:nrow(startup))
startup.nomissing <- subset(startup,!is.na(startup2))
only.money0$id <- c(1:nrow(only.money0))
only.money1$id <- c(1:nrow(only.money1))

svy.dat_all <- svydesign(id=~id,data=startup,weights=startup$wt)
svy.dat_R1 <- svydesign(id=~id,data=startup[which(startup$PhD==1),],weights=startup$wt[which(startup$PhD==1)])
svy.dat_restrict1 <- svydesign(id=~id,data=only.money0,weights=only.money0$wt_restrict1)
svy.dat_restrict2 <- svydesign(id=~id,data=only.money1,weights=only.money1$wt_restrict2)
svy.dat_R1_restrict1 <- svydesign(id=~id,data=only.money0[which(only.money0$PhD==1),],
                                  weights=only.money0$wt_R1_restrict1[which(only.money0$PhD==1)])
svy.dat_R1_restrict2 <- svydesign(id=~id,data=only.money1[which(only.money1$PhD==1),],
                                  weights=only.money1$wt_R1_restrict2[which(only.money1$PhD==1)])


# Table 1

mod1 <- svyglm(didyouget2~publicprivate+PhD+
                             articles+otheroffers2+
                             woman+
                             ABD+
                             priorTT+
                             anyfamily, design=svy.dat_all,
                           family=quasibinomial(link="logit"))

mod2 <- svyglm(didyouget2~publicprivate+PhD+
                             articles+otheroffers2+
                             usnews_invert+
                             woman+
                             ABD+
                             priorTT+
                             anyfamily, design=svy.dat_all,
                           family=quasibinomial(link="logit"))
mod3 <- svyglm(log(totalmoney)~publicprivate+PhD*woman+
                                    articles*woman+otheroffers2*woman+
                                    woman+
                                    ABD+
                                    priorTT+
                                    anyfamily, design=svy.dat_restrict2)
mod4 <- svyglm(log(totalmoney)~publicprivate+PhD*woman+
                                    articles*woman+otheroffers2*woman+
                                    usnews_invert*woman+
                                    woman+
                                    ABD+
                                    priorTT+
                                    anyfamily, design=svy.dat_restrict2)
mod5 <- svyglm(log(totalmoney)~publicprivate+
                                    articles*woman+otheroffers2*woman+
                                    usnews_invert*woman+
                                    woman+
                                    ABD+
                                    priorTT+
                                    anyfamily, design=svy.dat_R1_restrict2)

stargazer(mod1,mod2,mod3,mod4,mod5,type="text")


# Table B.1:
items <- t(apply(startup[,grep("Q7",names(startup))],2,function(x){ table(x)/sum(table(x))*100}))

print(xtable(items))



# Table C.1
tablec1 <- function(variable){
  round((table(variable)/sum(table(variable)))[2],3)*100
  
}
tablec1(startup$woman)
tablec1(startup$white)
tablec1(startup$black)
tablec1(startup$latinx)
tablec1(startup$asian)
tablec1(startup$mena)
tablec1(startup$native)
tablec1(startup$highestdegree)
round((table(startup$highestdegree)/sum(table(startup$highestdegree)))[3],3)*100
round((table(startup$highestdegree)/sum(table(startup$highestdegree)))[1],3)*100
tablec1(startup$publicprivate)
tablec1(startup$anyfamily)
tablec1(startup$ABD)
tablec1(startup$priorTT)
tablec1(startup$otheroffers2)
summary(startup$articles)
summary(startup$usnews_invert)


# Table C.2
cbind(
  round(table(demog$gender2)/sum(table(demog$gender2)),4)*100,
  round(table(startup$gender)/sum(table(startup$gender)),4)*100,
  round(table(demog2$gender2)/sum(table(demog2$gender2)),4)*100,
  round(table(startup$gender[which(startup$PhD==1)])/sum(table(startup$gender[which(startup$PhD==1)])),4)*100)

# Figure C.1
hist(only.money1$totalmoney,main="How much will you (or did you) receive
     in total over your probationary period?",
     xlab="Total Funding in Probationary Period (in USD)")
abline(v=median(only.money1$totalmoney,na.rm=T),lwd=2)
abline(v=quantile(only.money1$totalmoney,na.rm=T,p=c(.25,.75)),lwd=2,lty=2)
quantile(only.money1$totalmoney,na.rm=T,p=c(.25,.5,.75,1))

# Table C.3
desc.table <- function(group1,group2){
  no <- (table(group1)/sum(table(group1))*100)[1]
  sums <- summary(group2)[c(4,3,1,6)]
  sd <- sd(group2,na.rm=T)
  return(c(no,sums,sd))
}


print(xtable(rbind(
  desc.table(startup$didyouget2,only.money1$totalmoney),
  desc.table(startup$didyouget2[which(startup$PhD==1)],only.money1$totalmoney[which(only.money1$PhD==1)]),
  desc.table(startup$didyouget2[which(startup$PhD==0)],only.money1$totalmoney[which(only.money1$PhD==0)]),
  desc.table(startup$didyouget2[which(startup$publicprivate=="Public")],only.money1$totalmoney[which(only.money1$publicprivate=="Public")]),
  desc.table(startup$didyouget2[which(startup$publicprivate=="Private")],only.money1$totalmoney[which(only.money1$publicprivate=="Private")]),
  desc.table(startup$didyouget2[which(startup$usnews_invert>=75)],only.money1$totalmoney[which(only.money1$usnews_invert>=75)]),
  desc.table(startup$didyouget2[which(startup$usnews_invert>=50 & startup$usnews_invert<75)],
             only.money1$totalmoney[which(only.money1$usnews_invert>=50 & only.money1$usnews_invert<75)]),
  desc.table(startup$didyouget2[which(startup$usnews_invert>=25 & startup$usnews_invert<50)],
             only.money1$totalmoney[which(only.money1$usnews_invert>=25 & only.money1$usnews_invert<50)]),
  desc.table(startup$didyouget2[which(startup$usnews_invert<25)],only.money1$totalmoney[which(only.money1$usnews_invert<25)]))))


# Figure D.2

mod4_unweighted_restrict2 <- lm(log(totalmoney)~
                                  publicprivate+PhD*woman+
                                  articles*woman+otheroffers2*woman+
                                  usnews_invert*woman+
                                  woman+
                                  ABD+
                                  priorTT+
                                  anyfamily, data=only.money1)

mod5_unweighted_restrict2 <- lm(log(totalmoney)~
                                  publicprivate+
                                  articles*woman+otheroffers2*woman+
                                  usnews_invert*woman+
                                  woman+
                                  ABD+
                                  priorTT+
                                  anyfamily, data=only.money1)

ppW2 <- NULL
ppM2 <- NULL

for(i in 1:1000){
  boot <- only.money1[sample(c(1:nrow(only.money1)),nrow(only.money1),replace=T),]
  bootmod1 <- lm(log(totalmoney)~
                   publicprivate+PhD*woman+
                   articles*woman+otheroffers2*woman+
                   usnews_invert*woman+
                   woman+
                   ABD+
                   priorTT+
                   anyfamily, data=boot)
  
  
  hypWboot <- cbind(Intercept=1,
                    Public=1,
                    PhD=1,
                    woman=1,
                    articles=mean(only.money1$articles,na.rm=T),
                    otheroffers2=0,
                    usnewsinvert=seq(min(only.money1$usnews_invert,na.rm=T),max(only.money1$usnews_invert,na.rm=T),1),
                    ABD=1,
                    priorTT=0,
                    anyfamily=0,
                    PhDwoman=1,
                    womenarticles=mean(only.money1$articles,na.rm=T),
                    womanoffers=mean(only.money1$otheroffers2,na.rm=T),
                    womenrank=seq(min(only.money1$usnews_invert,na.rm=T),max(only.money1$usnews_invert,na.rm=T),1))
  hypMboot <- cbind(Intercept=1,
                    Public=1,
                    PhD=1,
                    woman=0,
                    articles=mean(only.money1$articles,na.rm=T),
                    otheroffers2=0,
                    usnewsinvert=seq(min(only.money1$usnews_invert,na.rm=T),max(only.money1$usnews_invert,na.rm=T),1),
                    ABD=1,
                    priorTT=0,
                    anyfamily=0,
                    PhDwoman=0,
                    womenarticles=0,
                    womanoffers=0,
                    womenrank=0)
  predWboot <- hypWboot %*% coef(bootmod1)
  predMboot <- hypMboot %*% coef(bootmod1)
  
  ppW2 <- cbind(ppW2,predWboot)
  ppM2 <- cbind(ppM2,predMboot)
  
}

hypW <- cbind(Intercept=1,
              Public=1,
              PhD=1,
              woman=1,
              articles=mean(only.money1$articles,na.rm=T),
              otheroffers2=0,
              usnewsinvert=seq(min(only.money1$usnews_invert,na.rm=T),max(only.money1$usnews_invert,na.rm=T),1),
              ABD=1,
              priorTT=0,
              anyfamily=0,
              PhDwoman=1,
              womenarticles=mean(only.money1$articles,na.rm=T),
              womanoffers=mean(only.money1$otheroffers2,na.rm=T),
              womenrank=seq(min(only.money1$usnews_invert,na.rm=T),max(only.money1$usnews_invert,na.rm=T),1))
hypM <- cbind(Intercept=1,
              Public=1,
              PhD=1,
              woman=0,
              articles=mean(only.money1$articles,na.rm=T),
              otheroffers2=0,
              usnewsinvert=seq(min(only.money1$usnews_invert,na.rm=T),max(only.money1$usnews_invert,na.rm=T),1),
              ABD=1,
              priorTT=0,
              anyfamily=0,
              PhDwoman=0,
              womenarticles=0,
              womanoffers=0,
              womenrank=0)
predW <- hypW %*% coef(mod4_unweighted_restrict2)
predM <- hypM %*% coef(mod4_unweighted_restrict2)


df2 <- data.frame(ranks=rep(seq(min(only.money1$usnews_invert,na.rm=T),
                                max(only.money1$usnews_invert,na.rm=T),1),2),
                  pred=c(predW,predM),
                  Gender=c(rep("Woman",length(predW)),rep("Man",length(predM))),
                  predlow=c(apply(ppW2,1,quantile,.08),apply(ppM2,1,quantile,.08)),
                  predhigh=c(apply(ppW2,1,quantile,.92),apply(ppM2,1,quantile,.92)),
                  dolpred=c(exp(predW),exp(predM)))

p <- ggplot(df2,aes(x=ranks,y=dolpred,color=Gender))+
  # geom_ribbon(aes(ymin=exp(predlow),ymax=exp(predhigh),fill=Gender),
  #                 alpha=.3)+
  geom_point(aes(color=Gender))+
  geom_segment(aes(x=ranks,xend=ranks,y=exp(predlow),yend=exp(predhigh),fill=Gender))+
  labs(title="Funds by Alma Mater Prestige and Gender",x="Alma Mater Prestige",
       y="Predicted Funds (logged)") +
  coord_cartesian(ylim=c(0,40000))
p

## visualizing model with only PhD granting schools
## bootstrapping CIs
ppW2 <- NULL
ppM2 <- NULL
for(i in 1:1000){
  phd <- only.money1[which(only.money1$PhD==1),]
  boot <- phd[sample(c(1:nrow(phd)),nrow(phd),replace=T),]
  bootmod1 <- lm(log(totalmoney)~
                   publicprivate+
                   articles*woman+otheroffers2*woman+
                   usnews_invert*woman+
                   woman+
                   ABD+
                   priorTT+
                   anyfamily,
                 data=boot)
  
  hypWboot <- cbind(Intercept=1,
                    Public=1,
                    articles=mean(phd$articles,na.rm=T),
                    woman=1,
                    otheroffers2=0,
                    usnewsinvert=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1),
                    ABD=1,
                    priorTT=0,
                    anyfamily=0,
                    womenarticles=mean(phd$articles,na.rm=T),
                    womanoffers=mean(phd$otheroffers2,na.rm=T),
                    womenrank=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1))
  hypMboot <- cbind(Intercept=1,
                    Public=1,
                    articles=mean(phd$articles,na.rm=T),
                    woman=0,
                    otheroffers2=0,
                    usnewsinvert=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1),
                    ABD=1,
                    priorTT=0,
                    anyfamily=0,
                    womenarticles=0,
                    womanoffers=0,
                    womenrank=0)
  predWboot <- hypWboot %*% coef(bootmod1)
  predMboot <- hypMboot %*% coef(bootmod1)
  
  ppW2 <- cbind(ppW2,predWboot)
  ppM2 <- cbind(ppM2,predMboot)
  
}

hypW <- cbind(Intercept=1,
              Public=1,
              articles=mean(phd$articles,na.rm=T),
              woman=1,
              otheroffers2=0,
              usnewsinvert=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1),
              ABD=1,
              priorTT=0,
              anyfamily=0,
              womenarticles=mean(phd$articles,na.rm=T),
              womanoffers=mean(phd$otheroffers2,na.rm=T),
              womenrank=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1))
hypM <- cbind(Intercept=1,
              Public=1,
              articles=mean(phd$articles,na.rm=T),
              woman=0,
              otheroffers2=0,
              usnewsinvert=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1),
              ABD=1,
              priorTT=0,
              anyfamily=0,
              womenarticles=0,
              womanoffers=0,
              womenrank=0)
predW <- hypW %*% coef(mod5_unweighted_restrict2)

predM <- hypM %*% coef(mod5_unweighted_restrict2)


df2 <- data.frame(ranks=seq(min(phd$usnews_invert,na.rm=T),max(phd$usnews_invert,na.rm=T),1),
                  pred=c(predW,predM),
                  Gender=c(rep("Woman",length(predW)),rep("Man",length(predM))),
                  predlow=c(apply(ppW2,1,quantile,.08),apply(ppM2,1,quantile,.08)),
                  predhigh=c(apply(ppW2,1,quantile,.92),apply(ppM2,1,quantile,.92)),
                  dolpred=c(exp(predW),exp(predM)))

p <- ggplot(df2,aes(x=ranks,y=dolpred,color=Gender))+
  geom_point()+
  geom_segment(aes(x=ranks,xend=ranks,y=exp(predlow),yend=exp(predhigh),fill=Gender),)+
  
  labs(title="Funds by Alma Mater Prestige and Gender",x="Alma Mater Prestige",y="Predicted Funds (logged)")+
  coord_cartesian(ylim=c(0,40000))
p

# Table E.4

mod1_restricted <- svyglm(didyouget2~
                             publicprivate+PhD+
                             articles+otheroffers2+
                             woman+
                             ABD+
                             priorTT+
                             anyfamily, design=svy.dat_all,
                           family=quasibinomial(link="logit"))

mod1_restricted_unweighted <- glm(didyouget2~
                                     publicprivate+PhD+
                                     articles+otheroffers2+
                                     woman+
                                     ABD+
                                     priorTT+
                                     anyfamily, data=startup,
                                   family=binomial(link="logit"))

mod1_unrestricted_weighted <- svyglm(startup2~
                          publicprivate+PhD+
                          articles+otheroffers2+
                          woman+
                          ABD+
                          priorTT+
                          anyfamily, design=svy.dat_all,
                        family=quasibinomial(link="logit"))

mod1_unrestricted_unweighted <- glm(startup2~
                     publicprivate+PhD+
                     articles+otheroffers2+
                     woman+
                     ABD+
                     priorTT+
                     anyfamily,
                   data=startup,
                   family=binomial(link="logit"))




stargazer(mod1_restricted,mod1_restricted_unweighted,mod1_unrestricted_weighted,
          mod1_unrestricted_unweighted,
          type="text",no.space=T)


# Table E.5


mod2_restricted <- svyglm(didyouget2~
                             publicprivate+PhD+
                             articles+otheroffers2+
                             usnews_invert+
                             woman+
                             ABD+
                             priorTT+
                             anyfamily, design=svy.dat_all,
                           family=quasibinomial(link="logit"))

mod2_restricted_unweighted <- glm(didyouget2~
                                     publicprivate+PhD+
                                     articles+otheroffers2+
                                     usnews_invert+
                                     woman+
                                     ABD+
                                     priorTT+
                                     anyfamily, data=startup,
                                   family=binomial(link="logit"))


mod2_unrestricted_weighted <- svyglm(startup2~
                          publicprivate+PhD+
                          articles+otheroffers2+
                          usnews_invert+
                          woman+
                          ABD+
                          priorTT+
                          anyfamily,
                        design=svy.dat_all,
                        family=quasibinomial(link="logit"))


mod2_unrestricted_unweighted <- glm(startup2~
                     publicprivate+PhD+
                     articles+otheroffers2+
                     usnews_invert+
                     woman+
                     ABD+
                     priorTT+
                     anyfamily,
                   data=startup,
                   family=binomial(link="logit"))


stargazer(mod2_restricted,mod2_restricted_unweighted,mod2_unrestricted_weighted,
          mod2_unrestricted_unweighted,
          type="text",no.space=T)



# Table E.6
mod3_restricted <- svyglm(log(totalmoney)~
                                    publicprivate+PhD*woman+
                                    articles*woman+otheroffers2*woman+
                                    woman+
                                    ABD+
                                    priorTT+
                                    anyfamily, design=svy.dat_restrict2)

mod3_restricted_unweighted <- lm(log(totalmoney)~
                                  publicprivate+PhD*woman+
                                  articles*woman+otheroffers2*woman+
                                  woman+
                                  ABD+
                                  priorTT+
                                  anyfamily, data=only.money1)


mod3_unrestricted_weighted <- svyglm(log(totalmoney)~
                          publicprivate+PhD*woman+
                          articles*woman+otheroffers2*woman+
                          woman+
                          ABD+
                          priorTT+
                          anyfamily, design=svy.dat_all)


mod3_unrestricted_unweighted <- lm(log(totalmoney)~
                    publicprivate+PhD*woman+
                    articles*woman+otheroffers2*woman+
                    woman+
                    ABD+
                    priorTT+
                    anyfamily,
                  data=startup)

stargazer(mod3_restricted,mod3_restricted_unweighted,mod3_unrestricted_weighted,
          mod3_unrestricted_unweighted,
          type="text",no.space=T)


# Table E.7

mod4_restricted <- svyglm(log(totalmoney)~
                                    publicprivate+PhD*woman+
                                    articles*woman+otheroffers2*woman+
                                    usnews_invert*woman+
                                    woman+
                                    ABD+
                                    priorTT+
                                    anyfamily, design=svy.dat_restrict2)

mod4_restricted_unweighted <- lm(log(totalmoney)~
                                  publicprivate+PhD*woman+
                                  articles*woman+otheroffers2*woman+
                                  usnews_invert*woman+
                                  woman+
                                  ABD+
                                  priorTT+
                                  anyfamily, data=only.money1)

mod4_unrestricted_weighted <- svyglm(log(totalmoney)~
                          publicprivate+PhD*woman+
                          articles*woman+otheroffers2*woman+
                          usnews_invert*woman+
                          woman+
                          ABD+
                          priorTT+
                          anyfamily, design=svy.dat_all)


mod4_unrestricted_unweighted <- lm(log(totalmoney)~
                    publicprivate+PhD*woman+
                    articles*woman+otheroffers2*woman+
                    usnews_invert*woman+
                    woman+
                    ABD+
                    priorTT+
                    anyfamily,
                  data=startup)

stargazer(mod4_restricted,mod4_restricted_unweighted,mod4_unrestricted_weighted,
          mod4_unrestricted_unweighted,
          type="text",no.space=T)


# Table E.8
mod5_restricted <- svyglm(log(totalmoney)~
                                    publicprivate+
                                    articles*woman+otheroffers2*woman+
                                    usnews_invert*woman+
                                    woman+
                                    ABD+
                                    priorTT+
                                    anyfamily, design=svy.dat_R1_restrict2)

mod5_restricted_unweighted <- lm(log(totalmoney)~
                                  publicprivate+
                                  articles*woman+otheroffers2*woman+
                                  usnews_invert*woman+
                                  woman+
                                  ABD+
                                  priorTT+
                                  anyfamily, data=only.money1)

mod5_unrestricted_weighted <- svyglm(log(totalmoney)~
                          publicprivate+
                          articles*woman+otheroffers2*woman+
                          usnews_invert*woman+
                          woman+
                          ABD+
                          priorTT+
                          anyfamily, design=svy.dat_R1)


mod5_unrestricted_unweighted <- lm(log(totalmoney)~
                    publicprivate+
                    articles*woman+otheroffers2*woman+
                    usnews_invert*woman+
                    woman+
                    ABD+
                    priorTT+
                    anyfamily,
                  data=startup[which(startup$PhD==1),])

stargazer(mod5_restricted,mod5_restricted_unweighted,mod5_unrestricted_weighted,
          mod5_unrestricted_unweighted,
          type="text",no.space=T)



# Table F.9
f9_col1 <- lm(log(totalmoney)~
                        publicprivate+
                        articles+otheroffers2+
                        usnews_invert+
                        ABD+
                        priorTT+
                        anyfamily,
                      data=startup[which(startup$PhD==1 & startup$gender=="MAN"),])

f9_col2 <- lm(log(totalmoney)~
                          publicprivate+
                          articles+otheroffers2+
                          usnews_invert+
                          ABD+
                          priorTT+
                          anyfamily,
                        data=startup[which(startup$PhD==1 & startup$gender=="WOMAN"),])

f9_col3 <- lm(log(totalmoney)~
                                    publicprivate+
                                    articles+otheroffers2+
                                    usnews_invert+
                                    ABD+
                                    priorTT+
                                    anyfamily, data=only.money1[which(only.money1$PhD == 1 & only.money1$gender=="MAN"),])

f9_col4 <- lm(log(totalmoney)~
                                      publicprivate+
                                      articles+otheroffers2+
                                      usnews_invert+
                                      ABD+
                                      priorTT+
                                      anyfamily, data=only.money1[which(only.money1$PhD == 1 & only.money1$gender=="WOMAN"),])

stargazer(f9_col1,f9_col2,f9_col3,f9_col4,
          type="text",no.space=T)

# Table G.1
g1_col1 <- svyglm(didyouget2~
                               publicprivate+PhD, design=svy.dat_all,
                             family=quasibinomial(link="logit"))
g1_col2 <- svyglm(log(totalmoney)~
                                      publicprivate+
                                      PhD, design=svy.dat_restrict2)
stargazer(g1_col1,g1_col2,no.space=T,
          type="text")


# Table G.2
g2_col1 <- svyglm(didyouget2~
                                 usnews_invert, design=svy.dat_all,
                               family=quasibinomial(link="logit"))

g2_col2 <- svyglm(didyouget2~
                                 articles, design=svy.dat_all,
                               family=quasibinomial(link="logit"))
g2_col3 <- svyglm(didyouget2~
                    otheroffers2, design=svy.dat_all,
                  family=quasibinomial(link="logit"))

g2_col4 <- svyglm(log(totalmoney)~
                    usnews_invert,design=svy.dat_restrict2)
g2_col5 <- svyglm(log(totalmoney)~
                    articles,design=svy.dat_restrict2)

g2_col6 <- svyglm(log(totalmoney)~
                                      otheroffers2,design=svy.dat_restrict2)
stargazer(g2_col1,g2_col2,g2_col3,g2_col4,g2_col5,g2_col6,
          no.space=T,type="text")

# Table G.3
g3_col1 <- svyglm(didyouget2~
                             ABD+priorTT, design=svy.dat_all,
                           family=quasibinomial(link="logit"))
g3_col2 <- svyglm(didyouget2~
                                  anyfamily, design=svy.dat_all,
                                family=quasibinomial(link="logit"))
g3_col3 <- svyglm(log(totalmoney)~
                    ABD+priorTT,design=svy.dat_restrict2)

g3_col4 <- svyglm(log(totalmoney)~
                                         anyfamily,design=svy.dat_restrict2)


stargazer(g3_col1,g3_col2,g3_col3,g3_col4,no.space=T,
          type="text")


# Table G.4
g4_col1 <- svyglm(didyouget2~woman, design=svy.dat_all,
                             family=quasibinomial(link="logit"))
g4_col2 <- svyglm(didyouget2~publicprivate+PhD+woman, design=svy.dat_all,
                             family=quasibinomial(link="logit"))
g4_col3 <- svyglm(didyouget2~usnews_invert+articles+otheroffers2+woman, design=svy.dat_all,
                             family=quasibinomial(link="logit"))


g4_col4 <- svyglm(log(totalmoney)~woman,design=svy.dat_restrict2)
g4_col5 <- svyglm(log(totalmoney)~publicprivate+PhD*woman,
                  design=svy.dat_restrict2)
g4_col6 <- svyglm(log(totalmoney)~usnews_invert*woman+articles*woman+
                    otheroffers2*woman,design=svy.dat_restrict2)




stargazer(g4_col1,g4_col2,g4_col3,g4_col4,g4_col5,g4_col6,
          no.space=T,type="text")



# Table H.1


race1 <- svyglm(didyouget2~
                  publicprivate+PhD+
                  articles+otheroffers2+
                  woman+
                  ABD+
                  priorTT+
                  anyfamily+black+latinx+asian+mena+native, design=svy.dat_all,
                family=quasibinomial(link="logit"))

race2 <- svyglm(didyouget2~
                  publicprivate+PhD+
                  articles+otheroffers2+
                  usnews_invert+
                  woman+
                  ABD+
                  priorTT+
                  anyfamily+black+latinx+asian+mena+native, design=svy.dat_all,
                family=quasibinomial(link="logit"))


race3 <- svyglm(log(totalmoney)~
                  publicprivate+PhD*woman+
                  articles*woman+otheroffers2*woman+
                  woman+
                  ABD+
                  priorTT+
                  anyfamily+black+latinx+asian+mena+native, design=svy.dat_restrict2)


race4 <- svyglm(log(totalmoney)~
                  publicprivate+PhD*woman+
                  articles*woman+otheroffers2*woman+
                  usnews_invert*woman+
                  woman+
                  ABD+
                  priorTT+
                  anyfamily+black+latinx+asian+mena+native, design=svy.dat_restrict2)


race5 <- svyglm(log(totalmoney)~
                  publicprivate+
                  articles*woman+otheroffers2*woman+
                  usnews_invert*woman+
                  woman+
                  ABD+
                  priorTT+
                  anyfamily+black+latinx+asian+mena+native, design=svy.dat_R1_restrict2)

stargazer(race1,race2,race3,race4,race5,
          type="text",no.space=T)


# end of file.